In [1]:
import pandas as pd
import numpy as np
import math as math
import matplotlib.pyplot as plt
import statsmodels.api as sm
from plotnine import *
from statsmodels.formula.api import ols
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_predict
from sklearn import datasets, svm, tree
from scipy import stats
In [2]:
arms_sales = pd.read_csv('TIV-Import-All-1950-2020.csv')

# Filling all the NaN values with 0.0 for later calculation
arms_sales.fillna(value = 0, inplace = True)
arms_sales_original = arms_sales
arms_sales.head()
Out[2]:
Country 1950 1951 1952 1953 1954 1955 1956 1957 1958 ... 2012 2013 2014 2015 2016 2017 2018 2019 2020 Total
0 Afghanistan 0.0 0.0 0.0 0.0 0.0 4.0 7.0 89.0 254.0 ... 523.0 235.0 288.0 116.0 174.0 252.0 349.0 382.0 227.0 16391.0
1 African Union** 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 2.0 0.0 2.0 0.0 0.0 0.0 4.0 0.0 79.0
2 Albania 20.0 10.0 11.0 20.0 22.0 132.0 98.0 42.0 47.0 ... 15.0 7.0 7.0 7.0 0.0 4.0 2.0 5.0 0.0 2771.0
3 Algeria 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 661.0 362.0 620.0 898.0 2903.0 1128.0 1284.0 164.0 549.0 32139.0
4 Amal (Lebanon)* 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 60.0

5 rows × 73 columns

In [3]:
#================================================
#============= Moving Data Around ===============
#================================================

# Sorting the values by the 'Total' of the row to find the highest values in the data.

arms_sales.sort_values(by=['Total'], ascending=False, inplace=True)
In [4]:
# Rearranging values, bringing 'Total' to the first column in the df.

cols = arms_sales.columns.tolist()
cols = cols[-1:] + cols[:-1]
# cols

arms_sales = arms_sales[cols] 
arms_sales.head()
Out[4]:
Total Country 1950 1951 1952 1953 1954 1955 1956 1957 ... 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
260 1973386.0 Total 8139.0 11497.0 16533.0 24027.0 20371.0 20147.0 24262.0 23556.0 ... 30043.0 27995.0 27174.0 27073.0 28638.0 31438.0 31762.0 27028.0 27105.0 22831.0
94 126727.0 India 141.0 277.0 104.0 430.0 265.0 350.0 366.0 980.0 ... 3598.0 4392.0 5381.0 3347.0 3117.0 3003.0 2909.0 1485.0 3075.0 2799.0
39 80828.0 China 2572.0 2197.0 3430.0 3144.0 3500.0 1730.0 2255.0 2126.0 ... 1055.0 1571.0 1289.0 1077.0 1262.0 1139.0 1338.0 1962.0 1347.0 811.0
193 70781.0 Saudi Arabia 0.0 0.0 7.0 1.0 2.0 16.0 10.0 72.0 ... 1222.0 1033.0 1614.0 2740.0 3377.0 2961.0 3931.0 3315.0 3419.0 2466.0
58 67745.0 Egypt 163.0 57.0 0.0 1.0 142.0 725.0 1065.0 479.0 ... 630.0 281.0 673.0 420.0 1436.0 1690.0 2395.0 1677.0 1046.0 1311.0

5 rows × 73 columns

In [5]:
# Dropping row that is the total of all values. Not useful for the analysis. This part is optional.

arms_sales = arms_sales.drop(labels=[260], axis=0)
arms_sales.head()
Out[5]:
Total Country 1950 1951 1952 1953 1954 1955 1956 1957 ... 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
94 126727.0 India 141.0 277.0 104.0 430.0 265.0 350.0 366.0 980.0 ... 3598.0 4392.0 5381.0 3347.0 3117.0 3003.0 2909.0 1485.0 3075.0 2799.0
39 80828.0 China 2572.0 2197.0 3430.0 3144.0 3500.0 1730.0 2255.0 2126.0 ... 1055.0 1571.0 1289.0 1077.0 1262.0 1139.0 1338.0 1962.0 1347.0 811.0
193 70781.0 Saudi Arabia 0.0 0.0 7.0 1.0 2.0 16.0 10.0 72.0 ... 1222.0 1033.0 1614.0 2740.0 3377.0 2961.0 3931.0 3315.0 3419.0 2466.0
58 67745.0 Egypt 163.0 57.0 0.0 1.0 142.0 725.0 1065.0 479.0 ... 630.0 281.0 673.0 420.0 1436.0 1690.0 2395.0 1677.0 1046.0 1311.0
103 65890.0 Japan 0.0 0.0 8.0 223.0 314.0 408.0 757.0 1059.0 ... 268.0 205.0 252.0 307.0 329.0 330.0 441.0 654.0 899.0 724.0

5 rows × 73 columns

In [6]:
# Only taking the Top 50 values from the data for later analysis and resetting index values

arms_sales = arms_sales.head(50)
arms_sales.reset_index(drop=True, inplace=True)
arms_sales
Out[6]:
Total Country 1950 1951 1952 1953 1954 1955 1956 1957 ... 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
0 126727.0 India 141.0 277.0 104.0 430.0 265.0 350.0 366.0 980.0 ... 3598.0 4392.0 5381.0 3347.0 3117.0 3003.0 2909.0 1485.0 3075.0 2799.0
1 80828.0 China 2572.0 2197.0 3430.0 3144.0 3500.0 1730.0 2255.0 2126.0 ... 1055.0 1571.0 1289.0 1077.0 1262.0 1139.0 1338.0 1962.0 1347.0 811.0
2 70781.0 Saudi Arabia 0.0 0.0 7.0 1.0 2.0 16.0 10.0 72.0 ... 1222.0 1033.0 1614.0 2740.0 3377.0 2961.0 3931.0 3315.0 3419.0 2466.0
3 67745.0 Egypt 163.0 57.0 0.0 1.0 142.0 725.0 1065.0 479.0 ... 630.0 281.0 673.0 420.0 1436.0 1690.0 2395.0 1677.0 1046.0 1311.0
4 65890.0 Japan 0.0 0.0 8.0 223.0 314.0 408.0 757.0 1059.0 ... 268.0 205.0 252.0 307.0 329.0 330.0 441.0 654.0 899.0 724.0
5 57094.0 Turkey 117.0 136.0 315.0 514.0 473.0 335.0 198.0 544.0 ... 780.0 1503.0 793.0 1524.0 438.0 328.0 428.0 487.0 754.0 86.0
6 56204.0 Germany 0.0 0.0 0.0 0.0 0.0 85.0 1356.0 2270.0 ... 84.0 131.0 113.0 95.0 96.0 55.0 43.0 14.0 33.0 33.0
7 55883.0 South Korea 155.0 256.0 266.0 263.0 158.0 354.0 454.0 278.0 ... 1570.0 1078.0 191.0 723.0 254.0 1058.0 1052.0 1100.0 1480.0 1317.0
8 54271.0 Iraq 0.0 0.0 59.0 53.0 19.0 61.0 105.0 38.0 ... 574.0 476.0 353.0 665.0 1464.0 1789.0 1004.0 543.0 175.0 1.0
9 47226.0 United States 352.0 695.0 875.0 1315.0 1896.0 1892.0 2071.0 1371.0 ... 1012.0 1207.0 791.0 590.0 513.0 451.0 516.0 380.0 866.0 687.0
10 46629.0 Iran 4.0 6.0 0.0 1.0 75.0 110.0 121.0 53.0 ... 98.0 44.0 13.0 13.0 13.0 413.0 4.0 4.0 3.0 2.0
11 44779.0 Israel 53.0 92.0 73.0 112.0 49.0 117.0 641.0 133.0 ... 85.0 120.0 209.0 541.0 681.0 605.0 536.0 543.0 546.0 474.0
12 43252.0 Poland 190.0 575.0 794.0 1021.0 1999.0 1098.0 687.0 1080.0 ... 137.0 223.0 145.0 259.0 140.0 55.0 154.0 74.0 325.0 193.0
13 42706.0 Taiwan 0.0 98.0 304.0 383.0 320.0 622.0 256.0 436.0 ... 197.0 422.0 548.0 1077.0 629.0 97.0 493.0 99.0 64.0 102.0
14 42598.0 Australia 468.0 371.0 410.0 153.0 125.0 637.0 451.0 445.0 ... 1557.0 867.0 245.0 906.0 1464.0 1026.0 1653.0 1557.0 1184.0 1658.0
15 40278.0 Greece 97.0 500.0 307.0 85.0 152.0 203.0 92.0 354.0 ... 70.0 18.0 28.0 204.0 749.0 323.0 51.0 38.0 93.0 85.0
16 40048.0 Syria 106.0 1.0 20.0 18.0 9.0 74.0 106.0 381.0 ... 493.0 516.0 506.0 21.0 22.0 8.0 25.0 181.0 0.0 72.0
17 38819.0 Pakistan 194.0 169.0 145.0 95.0 145.0 195.0 158.0 529.0 ... 1128.0 1014.0 1095.0 828.0 779.0 837.0 837.0 799.0 521.0 759.0
18 35317.0 United Kingdom 105.0 246.0 532.0 1591.0 512.0 315.0 126.0 90.0 ... 366.0 583.0 494.0 211.0 397.0 259.0 1002.0 519.0 342.0 764.0
19 32406.0 Canada 264.0 294.0 998.0 1884.0 1123.0 1274.0 721.0 764.0 ... 336.0 197.0 202.0 316.0 408.0 233.0 331.0 160.0 174.0 207.0
20 32219.0 Italy 158.0 232.0 883.0 1309.0 308.0 587.0 983.0 773.0 ... 286.0 207.0 81.0 141.0 253.0 760.0 735.0 317.0 183.0 131.0
21 32139.0 Algeria 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 1125.0 661.0 362.0 620.0 898.0 2903.0 1128.0 1284.0 164.0 549.0
22 31079.0 Libya 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 ... 8.0 73.0 116.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
23 30708.0 Netherlands 154.0 454.0 423.0 975.0 784.0 736.0 1171.0 695.0 ... 120.0 254.0 451.0 9.0 60.0 75.0 33.0 143.0 497.0 610.0
24 30552.0 East Germany (GDR) 29.0 0.0 178.0 92.0 10.0 0.0 678.0 1122.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
25 30039.0 Soviet Union 36.0 0.0 0.0 8.0 70.0 945.0 1538.0 1500.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
26 29352.0 Czechoslovakia 90.0 247.0 707.0 1607.0 1003.0 1017.0 573.0 253.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
27 29281.0 UAE 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 1235.0 1154.0 2306.0 768.0 1224.0 956.0 966.0 1196.0 691.0 432.0
28 26954.0 Viet Nam 0.0 0.0 0.0 0.0 10.0 19.0 9.0 54.0 ... 1010.0 780.0 362.0 1212.0 855.0 909.0 783.0 604.0 160.0 41.0
29 25618.0 Spain 1.0 0.0 0.0 78.0 88.0 217.0 273.0 287.0 ... 168.0 224.0 155.0 109.0 135.0 86.0 82.0 142.0 40.0 57.0
30 23975.0 North Korea 861.0 372.0 211.0 674.0 263.0 404.0 479.0 428.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
31 20353.0 France 218.0 988.0 1180.0 1739.0 1659.0 1578.0 1446.0 738.0 ... 48.0 90.0 107.0 17.0 42.0 45.0 76.0 183.0 99.0 145.0
32 19928.0 Bulgaria 156.0 63.0 97.0 349.0 617.0 307.0 305.0 514.0 ... 29.0 5.0 0.0 0.0 3.0 0.0 0.0 0.0 2.0 44.0
33 19538.0 Indonesia 195.0 58.0 14.0 6.0 16.0 25.0 42.0 30.0 ... 249.0 212.0 801.0 1177.0 438.0 377.0 1171.0 329.0 217.0 270.0
34 19163.0 Singapore 0.0 0.0 0.0 0.0 0.0 0.0 6.0 0.0 ... 895.0 745.0 749.0 65.0 147.0 633.0 430.0 594.0 636.0 74.0
35 18439.0 Belgium 198.0 485.0 972.0 1098.0 604.0 361.0 682.0 680.0 ... 21.0 32.0 46.0 104.0 56.0 4.0 7.0 1.0 9.0 86.0
36 18170.0 Romania 4.0 76.0 454.0 904.0 629.0 359.0 448.0 186.0 ... 80.0 21.0 20.0 0.0 22.0 177.0 88.0 37.0 13.0 187.0
37 17476.0 Yugoslavia 0.0 182.0 490.0 525.0 534.0 334.0 352.0 229.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
38 17094.0 Brazil 0.0 615.0 0.0 133.0 112.0 379.0 55.0 152.0 ... 306.0 327.0 223.0 209.0 219.0 109.0 96.0 269.0 168.0 158.0
39 17026.0 Norway 113.0 127.0 426.0 421.0 19.0 168.0 282.0 142.0 ... 606.0 143.0 51.0 16.0 143.0 140.0 352.0 532.0 379.0 450.0
40 16391.0 Afghanistan 0.0 0.0 0.0 0.0 0.0 4.0 7.0 89.0 ... 661.0 523.0 235.0 288.0 116.0 174.0 252.0 349.0 382.0 227.0
41 14798.0 Thailand 62.0 88.0 61.0 44.0 60.0 34.0 10.0 88.0 ... 268.0 278.0 357.0 95.0 174.0 336.0 302.0 627.0 295.0 132.0
42 14173.0 Hungary 56.0 53.0 13.0 802.0 699.0 222.0 1.0 59.0 ... 9.0 0.0 2.0 7.0 0.0 14.0 6.0 27.0 16.0 67.0
43 13128.0 Venezuela 61.0 0.0 25.0 195.0 135.0 66.0 406.0 285.0 ... 596.0 687.0 993.0 181.0 167.0 137.0 0.0 0.0 0.0 51.0
44 13017.0 Peru 2.0 24.0 76.0 0.0 111.0 35.0 175.0 117.0 ... 63.0 10.0 22.0 152.0 212.0 223.0 65.0 94.0 0.0 19.0
45 12414.0 Sweden 225.0 297.0 316.0 431.0 319.0 433.0 555.0 253.0 ... 191.0 207.0 56.0 45.0 14.0 70.0 46.0 7.0 7.0 3.0
46 12279.0 Switzerland 154.0 151.0 321.0 169.0 142.0 230.0 313.0 223.0 ... 32.0 10.0 6.0 5.0 12.0 73.0 34.0 10.0 13.0 10.0
47 12088.0 Argentina 75.0 445.0 38.0 4.0 5.0 0.0 39.0 14.0 ... 15.0 3.0 12.0 7.0 21.0 17.0 47.0 21.0 48.0 7.0
48 11897.0 Jordan 0.0 0.0 0.0 0.0 27.0 34.0 62.0 0.0 ... 215.0 176.0 75.0 197.0 233.0 232.0 389.0 238.0 224.0 148.0
49 11893.0 Cuba 0.0 8.0 0.0 0.0 3.0 13.0 35.0 8.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

50 rows × 73 columns

In [7]:
#================================================
#============= Restructuring Data ===============
#================================================

# Reshaping the data just like project 3 for easier calculations and familiarity.

# Putting all years and countries into separate lists
years = arms_sales.iloc[1:4, 2:].columns.tolist()
countries = arms_sales['Country'].tolist()

# Initializing empty arrays to append data
country_list = []
year_list = []
import_list = []
totals_list = []
year_mean_list = []

# Double for-loop through all countries and years
for country in countries:
    for year in years:
        # Appending values
        country_list.append(country)
        year_list.append(year)

        # Index of row where Country = Country (ex: Counter == 'China') which will be used to locate value.
        country_index = arms_sales.index[arms_sales['Country'] == country][0]
        import_value = arms_sales.at[country_index, year]
        total_value = arms_sales.at[country_index, 'Total']
        year_mean = arms_sales[year].mean()

        # Appending retrieved values and calculations
        import_list.append(import_value)
        totals_list.append(total_value)
        year_mean_list.append(year_mean)


# N_O_T_E: Entire average over top 50 can be accessed through this quick calculation:
total_mean_over_top_50 = arms_sales['Total'].mean()

# Structuring data
data = {
    'country': country_list,
    'year': year_list,
    'imports': import_list,
    'year_avg_imports': year_mean_list,
    'total_imports_for_country': totals_list
    }


# Placing data into pandas dataframe. N_O_T_E: Remember you can always access the original because it still exists (i.e. arms_sales). There is also the most original reference stored in arms_sales_original, which has all the intial data in addition to the top 50. 
small_arms_df = pd.DataFrame(data)

small_arms_df
Out[7]:
country year imports year_avg_imports total_imports_for_country
0 India 1950 141.0 156.58 126727.0
1 India 1951 277.0 218.70 126727.0
2 India 1952 104.0 310.64 126727.0
3 India 1953 430.0 457.00 126727.0
4 India 1954 265.0 390.10 126727.0
... ... ... ... ... ...
3545 Cuba 2016 0.0 502.20 11893.0
3546 Cuba 2017 0.0 524.62 11893.0
3547 Cuba 2018 0.0 451.90 11893.0
3548 Cuba 2019 0.0 411.78 11893.0
3549 Cuba 2020 0.0 368.98 11893.0

3550 rows × 5 columns

In [8]:
#================================================
#============ Plots using all data ==============
#================================================

small_arms_df['year'] = small_arms_df['year'].astype(int)

x = small_arms_df['year'].values
y = small_arms_df['year_avg_imports'].values
plt.scatter(x, y)
plt.xlabel("Year")
plt.ylabel("Average Imports")
Out[8]:
Text(0, 0.5, 'Average Imports')
2021-05-09T09:11:39.018465 image/svg+xml Matplotlib v3.3.2, https://matplotlib.org/
In [9]:
ggplot(aes(x='year', y='year_avg_imports'), data=small_arms_df) + stat_smooth(method="lm") + geom_point(size=2) +\
    labs(title="All Imports 1950-2020",
         x = "Years",
         y = "Imports")
2021-05-09T09:11:40.115481 image/svg+xml Matplotlib v3.3.2, https://matplotlib.org/
Out[9]:
<ggplot: (194654591922)>
In [10]:
#================================================
#==== Filtering data for later graphing =========
#================================================

# All filtered data to use in individual graphs later.

small_arms_df['year'] = small_arms_df['year'].astype(int)

filt_1950_1960 = (small_arms_df['year'] >= 1950) & (small_arms_df['year'] <= 1960)
filt_1960_1970 = (small_arms_df['year'] >= 1960) & (small_arms_df['year'] <= 1970)
filt_1970_1980 = (small_arms_df['year'] >= 1970) & (small_arms_df['year'] <= 1980)
filt_1980_1990 = (small_arms_df['year'] >= 1980) & (small_arms_df['year'] <= 1990)
filt_1990_2000 = (small_arms_df['year'] >= 1990) & (small_arms_df['year'] <= 2000)
filt_2000_2010 = (small_arms_df['year'] >= 2000) & (small_arms_df['year'] <= 2010)
filt_2010_2020 = (small_arms_df['year'] >= 2010) & (small_arms_df['year'] <= 2020)
In [11]:
#================================================
#==== Fitting a linear regression model =========
#================================================

# N_O_T_E: Same calculations from project3. I used the year_average as the dependant variable here, I was likely wrong, so please let me know your opinion. Honestly not good at this. The only relationship I know these variables have is that the number of imports will increase depending on the year (wartime). Don't we need some kind of expected values for fitting a model?

small_arms_df['year'] = small_arms_df['year'].astype(int)

years = np.array(small_arms_df['year'])
imports = np.array(small_arms_df['imports'])
LR = LinearRegression()
LR.fit(years.reshape(-1,1), imports)
prediction = LR.predict(years.reshape(-1,1))
small_arms_df['importsPredict'] = pd.Series(prediction, index=small_arms_df.index)
small_arms_df['residual'] = small_arms_df['imports'] - small_arms_df['importsPredict']

RM = ols(formula='year ~ year_avg_imports', data=small_arms_df).fit()
RM.summary()
Out[11]:
OLS Regression Results
Dep. Variable: year R-squared: 0.000
Model: OLS Adj. R-squared: -0.000
Method: Least Squares F-statistic: 0.2967
Date: Sun, 09 May 2021 Prob (F-statistic): 0.586
Time: 09:11:41 Log-Likelihood: -15759.
No. Observations: 3550 AIC: 3.152e+04
Df Residuals: 3548 BIC: 3.153e+04
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
Intercept 1985.6562 1.253 1584.758 0.000 1983.200 1988.113
year_avg_imports -0.0014 0.003 -0.545 0.586 -0.006 0.004
Omnibus: 2691.101 Durbin-Watson: 0.165
Prob(Omnibus): 0.000 Jarque-Bera (JB): 209.039
Skew: -0.015 Prob(JB): 4.05e-46
Kurtosis: 1.812 Cond. No. 1.83e+03


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.83e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
In [12]:
#================================================
#====== Creating Bins for slicing years =========
#================================================

# Approach like from project2, cutting the data up and displaying from there. 

bins = [1949, 1960, 1970, 1980, 1990, 2000, 2010, 2020]
groups = ["1950-1960", "1960-1970", "1970-1980", "1980-1990", "1990-2000", "2000-2010", "2010-2020"]

# Created new column that depicts which range in years a row falls into.
small_arms_df['cut_year'] = pd.cut(small_arms_df.year, bins, labels=groups)

small_arms_df
Out[12]:
country year imports year_avg_imports total_imports_for_country importsPredict residual cut_year
0 India 1950 141.0 156.58 126727.0 484.059343 -343.059343 1950-1960
1 India 1951 277.0 218.70 126727.0 483.997945 -206.997945 1950-1960
2 India 1952 104.0 310.64 126727.0 483.936547 -379.936547 1950-1960
3 India 1953 430.0 457.00 126727.0 483.875150 -53.875150 1950-1960
4 India 1954 265.0 390.10 126727.0 483.813752 -218.813752 1950-1960
... ... ... ... ... ... ... ... ...
3545 Cuba 2016 0.0 502.20 11893.0 480.007093 -480.007093 2010-2020
3546 Cuba 2017 0.0 524.62 11893.0 479.945696 -479.945696 2010-2020
3547 Cuba 2018 0.0 451.90 11893.0 479.884298 -479.884298 2010-2020
3548 Cuba 2019 0.0 411.78 11893.0 479.822900 -479.822900 2010-2020
3549 Cuba 2020 0.0 368.98 11893.0 479.761502 -479.761502 2010-2020

3550 rows × 8 columns

In [13]:
# Wrong af, but just doing it anyway.

small_arms_df['year'] = small_arms_df['year'].astype(str)
ggplot(aes(x='cut_year', y='residual'), data=small_arms_df) + geom_violin() + labs(title="Residuals vs. Year", x="Years", y="Residuals")
2021-05-09T09:11:42.934480 image/svg+xml Matplotlib v3.3.2, https://matplotlib.org/
Out[13]:
<ggplot: (194654591835)>
In [14]:
#================================================
#=============== Multi graphs ===================
#================================================

# Imports in 10 year time periods

small_arms_df['year'] = small_arms_df['year'].astype(int)

ggplot(aes(x='year', y='year_avg_imports'), data=small_arms_df) + \
geom_point() + \
geom_smooth(method='lm') + \
facet_grid("cut_year ~ .") +\
    labs(title="Increments by 10 years",
         x = "Years",
         y = "Imports")
2021-05-09T09:11:45.016467 image/svg+xml Matplotlib v3.3.2, https://matplotlib.org/
Out[14]:
<ggplot: (194655931766)>
In [15]:
# Just another version of the graph above.

ggplot(aes(x='year', y='year_avg_imports', color='cut_year'), data=small_arms_df) + stat_smooth(method="lm") + geom_point()
2021-05-09T09:11:48.028466 image/svg+xml Matplotlib v3.3.2, https://matplotlib.org/
Out[15]:
<ggplot: (194655931721)>
In [16]:
#================================================
#=============== Individual graphs ==============
#================================================

small_arms_df['year'] = small_arms_df['year'].astype(int)

ggplot(aes(x='year', y='year_avg_imports'), data=small_arms_df.loc[filt_1950_1960]) + stat_smooth(method="lm") + geom_point(size=1) +\
    labs(title="All Imports 1950-1960",
         x = "Years",
         y = "Imports on Average")
2021-05-09T09:11:50.056466 image/svg+xml Matplotlib v3.3.2, https://matplotlib.org/
Out[16]:
<ggplot: (194655911159)>
In [17]:
ggplot(aes(x='year', y='year_avg_imports'), data=small_arms_df.loc[filt_1960_1970]) + stat_smooth(method="lm") + geom_point(size=1) +\
    labs(title="All Imports 1960-1970",
         x = "Years",
         y = "Imports on Average")
2021-05-09T09:11:50.838467 image/svg+xml Matplotlib v3.3.2, https://matplotlib.org/
Out[17]:
<ggplot: (194656354850)>
In [18]:
ggplot(aes(x='year', y='year_avg_imports'), data=small_arms_df.loc[filt_1970_1980]) + stat_smooth(method="lm") + geom_point(size=1) +\
    labs(title="All Imports 1970-1980",
         x = "Years",
         y = "Imports on Average")
2021-05-09T09:11:51.704468 image/svg+xml Matplotlib v3.3.2, https://matplotlib.org/
Out[18]:
<ggplot: (194655936988)>
In [19]:
ggplot(aes(x='year', y='year_avg_imports'), data=small_arms_df.loc[filt_1980_1990]) + stat_smooth(method="lm") + geom_point(size=1) +\
    labs(title="All Imports 1980-1990",
         x = "Years",
         y = "Imports on Average")
2021-05-09T09:11:52.495469 image/svg+xml Matplotlib v3.3.2, https://matplotlib.org/
Out[19]:
<ggplot: (194656338487)>
In [20]:
ggplot(aes(x='year', y='year_avg_imports'), data=small_arms_df.loc[filt_1990_2000]) + stat_smooth(method="lm") + geom_point(size=1) +\
    labs(title="All Imports 1990-2000",
         x = "Years",
         y = "Imports on Average")
2021-05-09T09:11:53.262482 image/svg+xml Matplotlib v3.3.2, https://matplotlib.org/
Out[20]:
<ggplot: (194656124564)>
In [21]:
ggplot(aes(x='year', y='year_avg_imports'), data=small_arms_df.loc[filt_2000_2010]) + stat_smooth(method="lm") + geom_point(size=1) +\
    labs(title="All Imports 2000-2010",
         x = "Years",
         y = "Imports on Average")
2021-05-09T09:11:54.080468 image/svg+xml Matplotlib v3.3.2, https://matplotlib.org/
Out[21]:
<ggplot: (194656217242)>
In [22]:
ggplot(aes(x='year', y='year_avg_imports'), data=small_arms_df.loc[filt_2010_2020]) + stat_smooth(method="lm") + geom_point(size=1) +\
    labs(title="All Imports 2010-2020",
         x = "Years",
         y = "Imports on Average")
2021-05-09T09:11:54.824468 image/svg+xml Matplotlib v3.3.2, https://matplotlib.org/
Out[22]:
<ggplot: (194656093826)>